bitkeeper revision 1.1108.14.1 (4104cfa1bp3FkfR2vYJwebaywi6JAg)
authormjw@wray-m-3.hpl.hp.com <mjw@wray-m-3.hpl.hp.com>
Mon, 26 Jul 2004 09:32:17 +0000 (09:32 +0000)
committermjw@wray-m-3.hpl.hp.com <mjw@wray-m-3.hpl.hp.com>
Mon, 26 Jul 2004 09:32:17 +0000 (09:32 +0000)
Stop a domain restarting if it's too soon since the last one.

tools/python/xen/xend/XendDomain.py
tools/python/xen/xend/XendDomainInfo.py

index f44b5c1ad5a820833052f84447f1f569dd4c3af6..6f2ac9192830096d949081bce33d573bce341e9f 100644 (file)
@@ -130,9 +130,11 @@ class XendDomain:
     def initial_refresh(self):
         """Refresh initial domain info from domain_db.
         """
-        #for d in self.domain_db.values(): print 'db dom=', d
+            
+        def cb_all_ok(val):
+            self.refresh()
+
         domlist = xc.domain_getinfo()
-        #for d in domlist: print 'xc dom=', d
         doms = {}
         for d in domlist:
             domid = str(d['dom'])
@@ -140,22 +142,13 @@ class XendDomain:
         dlist = []
         for config in self.domain_db.values():
             domid = str(sxp.child_value(config, 'id'))
-            #print "dom=", domid, "config=", config
             if domid in doms:
-                #print "dom=", domid, "new"
-                deferred = self._new_domain(config, doms[domid])
-                dlist.append(deferred)
+                d_dom = self._new_domain(config, doms[domid])
+                dlist.append(d_dom)
             else:
-                #print "dom=", domid, "del"
                 self._delete_domain(domid)
-        deferred = defer.DeferredList(dlist, fireOnOneErrback=1)
-        def cbok(val):
-            #print "doms:"
-            #for d in self.domain.values(): print 'dom', d
-            self.refresh()
-            #print "XendDomain>initial_refresh> doms:"
-            #for d in self.domain.values(): print 'dom', d
-        deferred.addCallback(cbok)
+        d_all = defer.DeferredList(dlist, fireOnOneErrback=1)
+        d_all.addCallback(cb_all_ok)
 
     def sync(self):
         """Sync domain db to disk.
@@ -179,10 +172,13 @@ class XendDomain:
         @param info:      domain info from xen
         @return: deferred
         """
-        deferred = XendDomainInfo.vm_recreate(savedinfo, info)
-        def fn(dominfo):
+        def cbok(dominfo):
             self.domain[dominfo.id] = dominfo
-        deferred.addCallback(fn)
+            if dominfo.restart_pending():
+                self.domain_restart_add(dominfo)
+        
+        deferred = XendDomainInfo.vm_recreate(savedinfo, info)
+        deferred.addCallback(cbok)
         return deferred
 
     def _add_domain(self, id, info, notify=1):
@@ -250,9 +246,9 @@ class XendDomain:
             if id not in self.domain:
                 savedinfo = None
                 deferred = XendDomainInfo.vm_recreate(savedinfo, d)
-                def fn(dominfo):
+                def cbok(dominfo):
                     self._add_domain(dominfo.id, dominfo)
-                deferred.addCallback(fn)
+                deferred.addCallback(cbok)
         # Remove entries for domains that no longer exist.
         for d in self.domain.values():
             info = doms.get(d.id)
@@ -313,11 +309,11 @@ class XendDomain:
         @param config: configuration
         @return: deferred
         """
-        deferred = XendDomainInfo.vm_create(config)
-        def fn(dominfo):
+        def cbok(dominfo):
             self._add_domain(dominfo.id, dominfo)
             return dominfo
-        deferred.addCallback(fn)
+        deferred = XendDomainInfo.vm_create(config)
+        deferred.addCallback(cbok)
         return deferred
 
     def domain_restart(self, dominfo):
@@ -326,11 +322,12 @@ class XendDomain:
         @param dominfo: domain object
         @return: deferred
         """
-        deferred = dominfo.restart()
-        def fn(dominfo):
+        def cbok(dominfo):
             self._add_domain(dominfo.id, dominfo)
             return dominfo
-        deferred.addCallback(fn)
+        log.info("Restarting domain: id=%s name=%s", dominfo.id, dominfo.name)
+        deferred = dominfo.restart()
+        deferred.addCallback(cbok)
         return deferred        
 
     def domain_configure(self, id, config):
@@ -348,11 +345,11 @@ class XendDomain:
             raise XendError("Invalid domain: " + str(id))
         if dominfo.config:
             raise XendError("Domain already configured: " + str(id))
-        def fn(dominfo):
+        def cbok(dominfo):
             self._add_domain(dominfo.id, dominfo)
             return dominfo
         deferred = dominfo.construct(config)
-        deferred.addCallback(fn)
+        deferred.addCallback(cbok)
         return deferred
     
     def domain_restore(self, src, progress=0):
@@ -363,11 +360,11 @@ class XendDomain:
         @return: deferred
         """
         
-        def fn(dominfo):
+        def cbok(dominfo):
             self._add_domain(dominfo.id, dominfo)
             return dominfo
         deferred = XendDomainInfo.vm_restore(src, progress=progress)
-        deferred.addCallback(fn)
+        deferred.addCallback(cbok)
         return deferred
     
     def domain_get(self, id):
@@ -439,9 +436,12 @@ class XendDomain:
         restart = (force and reason == 'reboot') or dominfo.restart_needed(reason)
         if restart:
             dominfo.restarting()
-            self.restarts[id] = dominfo
-            log.info('Scheduling restart for domain: id=%s name=%s', id, dominfo.name)
-            self.domain_restarts_schedule()
+            self.domain_restart_add(dominfo)
+
+    def domain_restart_add(self, dominfo):
+        self.restarts[dominfo.id] = dominfo
+        log.info('Scheduling restart for domain: id=%s name=%s', dominfo.id, dominfo.name)
+        self.domain_restarts_schedule()
             
     def domain_restart_cancel(self, id):
         """Cancel any restart scheduled for a domain.
@@ -450,6 +450,7 @@ class XendDomain:
         """
         dominfo = self.restarts.get(id)
         if dominfo:
+            log.info('Cancelling restart for domain: id=%s name=%s', dominfo.id, dominfo.name)
             dominfo.restart_cancel()
             del self.restarts[id]
 
@@ -465,18 +466,18 @@ class XendDomain:
             # Remove it from the restarts.
             del self.restarts[id]
             try:
-                log.info('domain_restarts> restart: id=%s config=%s', id, str(dominfo.config))
                 def cbok(dominfo):
-                    log.info('Restarted domain %s as %s', id, dominfo.id)
+                    log.info('Restarted domain id=%s as %s', id, dominfo.id)
                     self.domain_unpause(dominfo.id)
                 def cberr(err):
-                    log.exception("Delayed exception restarting domain")
+                    log.exception("Delayed exception restarting domain: id=%s", id)
                 deferred = self.domain_restart(dominfo)
                 deferred.addCallback(cbok)
                 deferred.addErrback(cberr)
             except:
-                log.exception("Exception restarting domain")
+                log.exception("Exception restarting domain: id=%s", id)
         if len(self.restarts):
+            # Run again later if any restarts remain.
             self.refresh_schedule(delay=5)
         
     def final_domain_destroy(self, id):
@@ -487,7 +488,7 @@ class XendDomain:
         dom = int(id)
         if dom <= 0:
             return 0
-        log.info('Destroying domain %s', str(id))
+        log.info('Destroying domain: id=%s', str(id))
         eserver.inject('xend.domain.destroy', id)
         dominfo = self.domain.get(id)
         if dominfo:
index 2a2743d3ef2bbf22ccb5034df9ac84ba95b52699..f2a866bea699fbc3ba9b153f33de4f48f0f18312 100644 (file)
@@ -65,6 +65,9 @@ restart_modes = [
 STATE_RESTART_PENDING = 'pending'
 STATE_RESTART_BOOTING = 'booting'
 
+STATE_VM_OK         = "ok"
+STATE_VM_TERMINATED = "terminated"
+
 def shutdown_reason(code):
     """Get a shutdown reason from a code.
 
@@ -273,7 +276,11 @@ def vm_recreate(savedinfo, info):
     vm.memory = info['mem_kb']/1024
     start_time = sxp.child_value(savedinfo, 'start_time')
     if start_time is not None:
-        vm.startTime = float(start_time)
+        vm.start_time = float(start_time)
+    vm.restart_state = sxp.child_value(savedinfo, 'restart_state')
+    restart_time = sxp.child_value(savedinfo, 'restart_time')
+    if restart_time is not None:
+        vm.restart_time = float(restart_time)
     config = sxp.child_value(savedinfo, 'config')
     if config:
         d = vm.construct(config)
@@ -339,15 +346,16 @@ def _vm_configure2(val, vm):
 class XendDomainInfo:
     """Virtual machine object."""
 
-    STATE_OK = "ok"
-    STATE_TERMINATED = "terminated"
+    """Minimum time between domain restarts in seconds.
+    """
+    MINIMUM_RESTART_TIME = 10
 
     def __init__(self):
         self.recreate = 0
         self.config = None
         self.id = None
         self.dom = None
-        self.startTime = None
+        self.start_time = None
         self.name = None
         self.memory = None
         self.image = None
@@ -361,11 +369,12 @@ class XendDomainInfo:
         self.blkif_backend = 0
         self.netif_backend = 0
         #todo: state: running, suspended
-        self.state = self.STATE_OK
+        self.state = STATE_VM_OK
         #todo: set to migrate info if migrating
         self.migrate = None
         self.restart_mode = RESTART_ONREBOOT
         self.restart_state = None
+        self.restart_time = None
         self.console_port = None
 
     def setdom(self, dom):
@@ -412,13 +421,17 @@ class XendDomainInfo:
             sxpr.append(['cpu', self.info['cpu']])
             sxpr.append(['cpu_time', self.info['cpu_time']/1e9])    
             
-        if self.startTime:
-            upTime =  time.time() - self.startTime  
-            sxpr.append(['up_time', str(upTime) ])
-            sxpr.append(['start_time', str(self.startTime) ])
+        if self.start_time:
+            up_time =  time.time() - self.start_time  
+            sxpr.append(['up_time', str(up_time) ])
+            sxpr.append(['start_time', str(self.start_time) ])
 
         if self.console:
             sxpr.append(self.console.sxpr())
+        if self.restart_state:
+            sxpr.append(['restart_state', self.restart_state])
+        if self.restart_time:
+            sxpr.append(['restart_time', str(self.restart_time)])
         if self.config:
             sxpr.append(['config', self.config])
         return sxpr
@@ -551,13 +564,13 @@ class XendDomainInfo:
     def cleanup(self):
         """Cleanup vm resources: release devices.
         """
-        self.state = self.STATE_TERMINATED
+        self.state = STATE_VM_TERMINATED
         self.release_devices()
 
     def is_terminated(self):
         """Check if a domain has been terminated.
         """
-        return self.state == self.STATE_TERMINATED
+        return self.state == STATE_VM_TERMINATED
 
     def release_devices(self):
         """Release all vm devices.
@@ -617,8 +630,8 @@ class XendDomainInfo:
         log.debug('init_domain> Created domain=%d name=%s memory=%d', dom, name, memory)
         self.setdom(dom)
 
-        if self.startTime is None:
-            self.startTime = time.time()
+        if self.start_time is None:
+            self.start_time = time.time()
 
     def build_domain(self, ostype, kernel, ramdisk, cmdline, vifs_n):
         """Build the domain boot image.
@@ -628,7 +641,6 @@ class XendDomainInfo:
             log.warning('kernel cmdline too long, domain %d', self.dom)
         dom = self.dom
         buildfn = getattr(xc, '%s_build' % ostype)
-        #print 'build_domain>', ostype, dom, kernel, cmdline, ramdisk
         flags = 0
         if self.netif_backend: flags |= SIF_NET_BE_DOMAIN
         if self.blkif_backend: flags |= SIF_BLK_BE_DOMAIN
@@ -753,8 +765,31 @@ class XendDomainInfo:
     def restarting(self):
         self.restart_state = STATE_RESTART_PENDING
 
+    def restart_pending(self):
+        return self.restart_state == STATE_RESTART_PENDING
+
+    def restart_check(self):
+        """Check if domain restart is OK.
+        To prevent restart loops, raise an error it is
+        less than MINIMUM_RESTART_TIME seconds since the last restart.
+        """
+        tnow = time.time()
+        if self.restart_time is not None:
+            tdelta = tnow - self.restart_time
+            if tdelta < self.MINIMUM_RESTART_TIME:
+                msg = 'VM %d restarting too fast' % self.dom
+                log.error(msg)
+                raise VmError(msg)
+        self.restart_time = tnow
+
     def restart(self):
+        """Restart the domain after it has exited.
+        Reuses the domain id and console port.
+
+        @return: deferred
+        """
         try:
+            self.restart_check()
             self.restart_state = STATE_RESTART_BOOTING
             d = self.construct(self.config)
         finally: